#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import sys
import glob
import argparse
import re
import time
import warnings
import my

#-i /scratch0/tmp/myourshaw/gmd/vcfs/gmd32.analysis_ready.vcf -o /scratch0/tmp/myourshaw/gmd/vcfs/gmd32.analysis_ready.vcf.brlmm -a /home/myourshaw/apps/Linkdatagen/MPS/annotHapMap2/annotHapMap2.txt

def main():

    #command line arguments
    parser = argparse.ArgumentParser(
      description = 'merge multiple vcfs into a set of normalized files for database entry',
      epilog = 'pypeline.vcfs2db version 1.0β1 ©2011-2012 Michael Yourshaw all rights reserved')
    parser.add_argument('--input', '-i', nargs='+',
        help='input vcf files')
    parser.add_argument('--annotfile', '-a', required=True,
        help='annotation file')
    parser.add_argument('--output', '-o', required=True,
        help='output file')
    args = parser.parse_args()

# read in annotation files are now taken from our internal format as generated by Katherine Smith
#Affy_SNP_name	
#rs_name	
#Chrom	
#Strand	
#deCODE_genetic_map_position	
#physical_position_(bp)	
#alleleA/alleleB
#allele_frequencies_1)CEU 2)ASW	3)CHB 4)CHD 5) GIH 6)JPT 7)LWK 8)MEX 9)MKK10)TSI 10)YRI

#Probe_set_ID	rs_name	Chrom	Strand	deCODE_genetic_map_position	physical_position_build37	allele_frequencies_CEU	#other pops to come

    #annotfile header
    Probe_set_ID,rs_name,Chrom,Strand,deCODE_genetic_map_position,physical_position_build37,SNP_type,allele_frequencies_CEU,A,B = range(10)
    #VCF header
    CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,GT = range(10)
    gt_re = re.compile(r'(?P<allele1>[0-9.]+)(?P<phase>[/|])(?P<allele2>[0-9.]+)', re.I)
    
    annotations = {}
    with open(args.annotfile) as annotfile:
        line_count = 0
        for line in annotfile:
            line_count+=1
            if line_count == 1:
                annot_header = line.rstrip().lower().split()
                annot_fields = {annot_header.index(f): f for f in annot_header}
            else:
                if not line.strip():
                    continue
                annot_line = line.rstrip().split()
                annotation = {annot_fields[annot_line.index(a)]: a for a in annot_line}
                annotations[annotation['chrom']+':'+annotation['physical_position_build37']] = annotation
    
    vcfs = my.unglob(args.input)
    for vcf_file in vcfs:
        with open(vcf_file) as vcf:
            pass
    


if __name__ == "__main__": sys.exit(main())
